Carga de Archivos y metadata.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggstatsplot)
## You can cite this package as:
##      Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
##      Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(haven)
library(phyloseq)
library(ggsci)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-4
library(uwot)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(ggforce)
library(glue)
library(easystats)
## # Attaching packages: easystats 0.5.2.8 (red = needs update)
## ✔ bayestestR  0.13.0    ✔ correlation 0.8.3  
## ✖ datawizard  0.6.4     ✔ effectsize  0.8.2  
## ✔ insight     0.18.8    ✔ modelbased  0.8.5  
## ✔ performance 0.10.1    ✔ parameters  0.20.0 
## ✔ report      0.5.5.1   ✔ see         0.7.4  
## 
## Restart the R-Session and update packages in red with `easystats::easystats_update()`.
library(ggrepel)
library(gamlss)
## Loading required package: splines
## Loading required package: gamlss.data
## 
## Attaching package: 'gamlss.data'
## 
## The following object is masked from 'package:datasets':
## 
##     sleep
## 
## Loading required package: gamlss.dist
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## Loading required package: parallel
##  **********   GAMLSS Version 5.4-10  ********** 
## For more on GAMLSS look at https://www.gamlss.com/
## Type gamlssNews() to see new features/changes/bug fixes.
library(emmeans)
library(mixOmics)
## 
## Loaded mixOmics 6.22.0
## Thank you for using mixOmics!
## Tutorials: http://mixomics.org
## Bookdown vignette: https://mixomicsteam.github.io/Bookdown
## Questions, issues: Follow the prompts at http://mixomics.org/contact-us
## Cite us:  citation('mixOmics')
## 
## 
## Attaching package: 'mixOmics'
## 
## The following object is masked from 'package:purrr':
## 
##     map
source("/storage/megalodon/R/import_kraken.R")
source("/storage/megalodon/R/kraken_heat_tree.R")
setwd("/storage/mariaInsenser/")
files <- dir("./reports/", pattern = "report", full.names = T)


reports <- import_kraken(files,20)
## Loading required package: doParallel
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## Loading required package: iterators
metadata <- read_tsv("LastMetadata.tsv") %>% 
  mutate(Dieta = as.factor(Dieta)) %>% 
  mutate(Sexo = as.factor(Sexo)) %>%
  mutate(Tratamiento = as.factor(Tratamiento)) %>% 
  mutate(Castrado = as.factor(Castrado))
## Registered S3 method overwritten by 'bit':
##   method   from  
##   print.ri gamlss
## Rows: 234 Columns: 94
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (7): Sample, Position, Run, Code_Sample, Description, GRUPO, Madre
## dbl (87): NumReads, Code_Microbiota, Studio, Orden, Heces, Grupos, Grupo3, D...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata <- metadata %>% mutate(Description = ifelse(grepl("solo",Description),"Destete",Description))

design <- read_tsv("DesignStudy")
## Rows: 12 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): Group, Sex, Diet, Treatment
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
table <- reports %>% 
  distinct() %>% 
  inner_join(metadata)
## Joining, by = "Sample"

Quality Control

Distribucion de reads en cada grupo

table %>% mutate(Studio = as.factor(Studio)) %>% 
  filter(Name =="root") %>% 
  ggplot(aes(x=GRUPO, y = Reads, fill = Studio)) + 
  geom_boxplot() + facet_wrap(Studio ~ ., scale = "free_x")

Numero de reasd classificados y no clasificados

table %>% 
  filter(depth ==1) %>% 
  mutate(Name2 = gsub("root","classified",Name2)) %>% 
  distinct() %>%
  ggplot(aes(x=Sample, y = Prop, fill = Name2)) + 
  geom_col() +
  coord_flip() + scale_fill_d3()

Analysis

Cuestion 1:

  Machos vs Hembras (Dieta NO, Tratamiento Control, Studio 3, Castrados no) 
  formula: ~ Sexo

Creamos la OTU-table para resolver la pregunta.

samplingSize <- table %>% 
  filter(Name2 == "Bacteria") %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Castrado == 0) %>% 
  filter(Tratamiento ==0) %>% 
  filter(Dieta ==0) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table <- table %>% 
  filter(!grepl("S",TaxRank)) %>% 
  filter(!grepl("U",TaxRank)) %>% 
  filter(!grepl("R",TaxRank)) %>% 
  filter(!grepl("D",TaxRank)) %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Castrado == 0) %>% 
  filter(Tratamiento ==0) %>% 
  filter(Dieta ==0)  %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% 
  t() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  t()

Alpha Diversidad

Calculamos la alpha diversity

simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"

Análisis del indice de Simpson

mod_gaus <- glm(DiversityValue ~ Sexo, data = div %>% 
                  filter(DiversityIndex == "Simpson"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo (formula: DiversityValue ~ Sexo). The model's explanatory power is
## moderate (R2 = 0.20). The model's intercept, corresponding to Sexo = 0, is at
## 0.93 (95% CI [0.92, 0.94], t(12) = 169.16, p < .001). Within this model:
## 
##   - The effect of Sexo [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-1.90e-03, 0.03], t(12) = 1.72, p = 0.086; Std. beta = 0.86, 95%
## CI [-0.12, 1.83])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Sexo"))
## Marginal Contrasts Analysis
## 
## Level1 | Level2 | Difference |        95% CI |       SE | t(12) |     p
## -----------------------------------------------------------------------
## Sexo0  |  Sexo1 |      -0.01 | [-0.03, 0.00] | 7.80e-03 | -1.72 | 0.112
## 
## Marginal contrasts estimated at Sexo
## p-value adjustment method: Holm (1979)

Análisis del indice de Shannon

mod_gaus <- glm(DiversityValue ~Sexo, data = div %>% 
                  filter(DiversityIndex == "Shannon"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo (formula: DiversityValue ~ Sexo). The model's explanatory power is
## substantial (R2 = 0.47). The model's intercept, corresponding to Sexo = 0, is
## at 3.23 (95% CI [3.08, 3.37], t(12) = 43.73, p < .001). Within this model:
## 
##   - The effect of Sexo [1] is statistically significant and positive (beta =
## 0.34, 95% CI [0.13, 0.54], t(12) = 3.25, p = 0.001; Std. beta = 1.32, 95% CI
## [0.52, 2.11])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Sexo"))
## Marginal Contrasts Analysis
## 
## Level1 | Level2 | Difference |         95% CI |   SE | t(12) |     p
## --------------------------------------------------------------------
## Sexo0  |  Sexo1 |      -0.34 | [-0.57, -0.11] | 0.10 | -3.25 | 0.007
## 
## Marginal contrasts estimated at Sexo
## p-value adjustment method: Holm (1979)

Beta Diversidad

Vamos a calcular la Beta-diversidad.

dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>% 
                     dplyr::select(Sample,contains("report")) %>%
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist %>% 
                     dplyr::select(Sample,Sexo) %>%
                     mutate(Sexo = as.factor(Sexo)) %>% 
                     column_to_rownames("Sample"))

X = otu_table %>% t()
Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo)) %>% pull(Sexo)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

dist_ad <-dist %>% 
  dplyr::select(Sample,contains("report")) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  as.dist() 

adonis2(dist_ad ~ Sexo, dist %>% 
                     dplyr::select(Sample,Sexo) %>% 
                     column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_ad ~ Sexo, data = dist %>% dplyr::select(Sample, Sexo) %>% column_to_rownames("Sample"))
##          Df SumOfSqs      R2      F Pr(>F)  
## Sexo      1  0.22594 0.16207 2.3209  0.067 .
## Residual 12  1.16819 0.83793                
## Total    13  1.39413 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Análisis diferencial

• Qué taxas son las responsables de las diferencias

gamlss_table <- otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Sexo,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Sexo, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% 
  mutate(test = list(emmeans(model, specs = "Sexo", data = data) %>% 
                       pairs() %>% 
                       as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  write_tsv("Cuestion1.tsv")
## Joining, by = "NCBI"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  #separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() 
## Joining, by = "NCBI"
## Warning: Removed 878 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Cuestion 2:

Machos vs Hembras (Dieta SI/NO, Tratamiento Control, Studio 3, Castrados no) formula: ~Sexo * Dieta Creamos la OTU-table para resolver la pregunta.

samplingSize <- table %>% 
  filter(Name2 == "Bacteria") %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Castrado == 0) %>% 
  filter(Tratamiento ==0) %>% 
  #filter(Dieta ==0) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table <- table %>% 
  filter(!grepl("S",TaxRank)) %>% 
  filter(!grepl("U",TaxRank)) %>% 
  filter(!grepl("R",TaxRank)) %>% 
  filter(!grepl("D",TaxRank)) %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Castrado == 0) %>% 
  filter(Tratamiento ==0) %>% 
  #filter(Dieta ==0)  %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% 
  t() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  t()

Alpha Diversidad

Calculamos la alpha diversity

simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"

Análisis del indice de Simpson

mod_gaus <- glm(DiversityValue ~ Sexo*Dieta, data = div %>% 
                  filter(DiversityIndex == "Simpson"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo and Dieta (formula: DiversityValue ~ Sexo * Dieta). The model's
## explanatory power is weak (R2 = 0.13). The model's intercept, corresponding to
## Sexo = 0 and Dieta = 0, is at 0.93 (95% CI [0.92, 0.95], t(29) = 141.21, p <
## .001). Within this model:
## 
##   - The effect of Sexo [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-4.99e-03, 0.03], t(29) = 1.43, p = 0.154; Std. beta = 0.75, 95%
## CI [-0.28, 1.78])
##   - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-6.24e-03, 0.03], t(29) = 1.24, p = 0.216; Std. beta = 0.60, 95%
## CI [-0.35, 1.55])
##   - The effect of Sexo [1] × Dieta [1] is statistically significant and negative
## (beta = -0.02, 95% CI [-0.05, -7.52e-04], t(29) = -2.02, p = 0.043; Std. beta =
## -1.40, 95% CI [-2.76, -0.04])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Sexo","Dieta"))
## Marginal Contrasts Analysis
## 
## Level1       |       Level2 | Difference |        95% CI |       SE | t(29) |      p
## ------------------------------------------------------------------------------------
## Sexo0 Dieta0 | Sexo0 Dieta1 |      -0.01 | [-0.04, 0.01] | 8.62e-03 | -1.24 | 0.802 
## Sexo0 Dieta0 | Sexo1 Dieta0 |      -0.01 | [-0.04, 0.01] | 9.35e-03 | -1.43 | 0.802 
## Sexo0 Dieta0 | Sexo1 Dieta1 |   9.24e-04 | [-0.02, 0.03] | 8.82e-03 |  0.10 | > .999
## Sexo0 Dieta1 | Sexo1 Dieta1 |       0.01 | [-0.01, 0.03] | 8.04e-03 |  1.44 | 0.802 
## Sexo1 Dieta0 | Sexo0 Dieta1 |   2.68e-03 | [-0.02, 0.03] | 8.62e-03 |  0.31 | > .999
## Sexo1 Dieta0 | Sexo1 Dieta1 |       0.01 | [-0.01, 0.04] | 8.82e-03 |  1.62 | 0.699 
## 
## Marginal contrasts estimated at Sexo, Dieta
## p-value adjustment method: Holm (1979)

Análisis del indice de Shannon

mod_gaus <- glm(DiversityValue ~Sexo * Dieta, data = div %>% 
                  filter(DiversityIndex == "Shannon"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo and Dieta (formula: DiversityValue ~ Sexo * Dieta). The model's
## explanatory power is moderate (R2 = 0.19). The model's intercept, corresponding
## to Sexo = 0 and Dieta = 0, is at 3.23 (95% CI [3.03, 3.42], t(29) = 32.30, p <
## .001). Within this model:
## 
##   - The effect of Sexo [1] is statistically significant and positive (beta =
## 0.34, 95% CI [0.06, 0.62], t(29) = 2.40, p = 0.016; Std. beta = 1.21, 95% CI
## [0.22, 2.20])
##   - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.18, 95% CI [-0.07, 0.44], t(29) = 1.42, p = 0.157; Std. beta = 0.66, 95% CI
## [-0.25, 1.57])
##   - The effect of Sexo [1] × Dieta [1] is statistically significant and negative
## (beta = -0.45, 95% CI [-0.82, -0.09], t(29) = -2.43, p = 0.015; Std. beta =
## -1.62, 95% CI [-2.92, -0.31])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Sexo","Dieta"))
## Marginal Contrasts Analysis
## 
## Level1       |       Level2 | Difference |        95% CI |   SE | t(29) |     p
## -------------------------------------------------------------------------------
## Sexo0 Dieta0 | Sexo0 Dieta1 |      -0.18 | [-0.55, 0.18] | 0.13 | -1.42 | 0.670
## Sexo0 Dieta0 | Sexo1 Dieta0 |      -0.34 | [-0.74, 0.06] | 0.14 | -2.40 | 0.138
## Sexo0 Dieta0 | Sexo1 Dieta1 |      -0.07 | [-0.45, 0.31] | 0.13 | -0.54 | 0.732
## Sexo0 Dieta1 | Sexo1 Dieta1 |       0.11 | [-0.23, 0.46] | 0.12 |  0.93 | 0.732
## Sexo1 Dieta0 | Sexo0 Dieta1 |       0.15 | [-0.21, 0.52] | 0.13 |  1.19 | 0.732
## Sexo1 Dieta0 | Sexo1 Dieta1 |       0.27 | [-0.11, 0.64] | 0.13 |  2.01 | 0.270
## 
## Marginal contrasts estimated at Sexo, Dieta
## p-value adjustment method: Holm (1979)

Beta Diversidad

Vamos a calcular la Beta-diversidad.

dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>% 
                     dplyr::select(Sample,contains("report")) %>%
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist %>% 
                     dplyr::select(Sample,Sexo,Dieta) %>%
                     mutate(Sexo = as.factor(Sexo)) %>% 
                     mutate(Dieta = as.factor(Dieta)) %>% 
                     column_to_rownames("Sample"))

X = otu_table %>% t()
Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo,Dieta)) %>% pull(Sexo)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo,Dieta)) %>% pull(Dieta)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

dist_ad <-dist %>% 
  dplyr::select(Sample,contains("report")) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  as.dist() 

adonis2(dist_ad ~ Sexo * Dieta, dist %>% 
          dplyr::select(Sample,Sexo,Dieta) %>% 
          column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_ad ~ Sexo * Dieta, data = dist %>% dplyr::select(Sample, Sexo, Dieta) %>% column_to_rownames("Sample"))
##            Df SumOfSqs      R2      F Pr(>F)    
## Sexo        1  0.12868 0.04101 1.6564  0.139    
## Dieta       1  0.54336 0.17315 6.9940  0.001 ***
## Sexo:Dieta  1  0.21306 0.06790 2.7425  0.039 *  
## Residual   29  2.25297 0.71795                  
## Total      32  3.13807 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Análisis diferencial

• Qué taxas son las responsables de las diferencias

gamlss_table <- otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Sexo,Dieta,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Sexo * Dieta, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% 
  mutate(test = list(emmeans(model, specs = c("Sexo","Dieta"), data = data) %>% 
                       pairs() %>% 
                       as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
  write_tsv("Cuestion2.tsv")
## Joining, by = "NCBI"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  #separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 5607 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 41 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 34 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Cuestion 3.1:

Machos vs Machos Castrados (Dieta NO, Tratamiento Control, Studio 3, Hembras NO) formula: ~Castrado Creamos la OTU-table para resolver la pregunta.

samplingSize <- table %>% 
  filter(Name2 == "Bacteria") %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo == 1) %>% 
  filter(Tratamiento ==0) %>% 
  filter(Dieta ==0) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table <- table %>% 
  filter(!grepl("S",TaxRank)) %>% 
  filter(!grepl("U",TaxRank)) %>% 
  filter(!grepl("R",TaxRank)) %>% 
  filter(!grepl("D",TaxRank)) %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo == 1) %>% 
  filter(Tratamiento ==0) %>% 
  filter(Dieta ==0)  %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% 
  t() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  t()

Alpha Diversidad

Calculamos la alpha diversity

simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"

Análisis del indice de Simpson

mod_gaus <- glm(DiversityValue ~ Castrado, data = div %>% 
                  filter(DiversityIndex == "Simpson")%>% 
                  mutate(Sexo = as.factor(Sexo)),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Castrado (formula: DiversityValue ~ Castrado). The model's explanatory power is
## very weak (R2 = 8.92e-03). The model's intercept, corresponding to Castrado =
## 0, is at 0.95 (95% CI [0.93, 0.96], t(13) = 128.51, p < .001). Within this
## model:
## 
##   - The effect of Castrado [1] is statistically non-significant and positive
## (beta = 3.45e-03, 95% CI [-0.02, 0.02], t(13) = 0.34, p = 0.732; Std. beta =
## 0.18, 95% CI [-0.87, 1.23])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Castrado"))
## Marginal Contrasts Analysis
## 
## Level1    |    Level2 | Difference |        95% CI |   SE | t(13) |     p
## -------------------------------------------------------------------------
## Castrado0 | Castrado1 |  -3.45e-03 | [-0.03, 0.02] | 0.01 | -0.34 | 0.738
## 
## Marginal contrasts estimated at Castrado
## p-value adjustment method: Holm (1979)

Análisis del indice de Shannon

mod_gaus <- glm(DiversityValue ~Castrado, data = div %>% 
                  filter(DiversityIndex == "Shannon")%>% 
                  mutate(Sexo = as.factor(Sexo)),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Castrado (formula: DiversityValue ~ Castrado). The model's explanatory power is
## very weak (R2 = 6.68e-03). The model's intercept, corresponding to Castrado =
## 0, is at 3.57 (95% CI [3.35, 3.78], t(13) = 32.42, p < .001). Within this
## model:
## 
##   - The effect of Castrado [1] is statistically non-significant and positive
## (beta = 0.04, 95% CI [-0.25, 0.34], t(13) = 0.30, p = 0.767; Std. beta = 0.16,
## 95% CI [-0.89, 1.21])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Castrado"))
## Marginal Contrasts Analysis
## 
## Level1    |    Level2 | Difference |        95% CI |   SE | t(13) |     p
## -------------------------------------------------------------------------
## Castrado0 | Castrado1 |      -0.04 | [-0.37, 0.28] | 0.15 | -0.30 | 0.772
## 
## Marginal contrasts estimated at Castrado
## p-value adjustment method: Holm (1979)

Beta Diversidad

Vamos a calcular la Beta-diversidad.

dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>% 
                     dplyr::select(Sample,contains("report")) %>%
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist %>% 
                     dplyr::select(Sample,Castrado) %>%
                     mutate(Castrado = as.factor(Castrado)) %>% 
                     column_to_rownames("Sample"))

X = otu_table %>% t()
Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Castrado)) %>% pull(Castrado)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

dist_ad <-dist %>% 
  dplyr::select(Sample,contains("report")) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  as.dist() 

adonis2(dist_ad ~ Castrado, dist %>% 
          dplyr::select(Sample,Castrado) %>% 
          column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_ad ~ Castrado, data = dist %>% dplyr::select(Sample, Castrado) %>% column_to_rownames("Sample"))
##          Df SumOfSqs      R2      F Pr(>F)
## Castrado  1   0.0457 0.03321 0.4465  0.827
## Residual 13   1.3304 0.96679              
## Total    14   1.3761 1.00000

Análisis diferencial

• Qué taxas son las responsables de las diferencias

gamlss_table <- otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Castrado,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Castrado, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% 
  mutate(test = list(emmeans(model, specs = "Castrado", data = data) %>% 
                       pairs() %>% 
                       as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  write_tsv("Cuestion3.1.tsv")
## Joining, by = "NCBI"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  #separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() 
## Joining, by = "NCBI"
## Warning: Removed 1065 rows containing missing values (`geom_text_repel()`).

Cuestion 3.2:

Hembras Tratamiento control vs Hembras Tratamiento (Dieta NO, Studio3, Machos NO) formula: ~Tratamiento

Creamos la OTU-table para resolver la pregunta.

samplingSize <- table %>% 
  filter(Name2 == "Bacteria") %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo == 0) %>% 
  #filter(Tratamiento ==0) %>% 
  filter(Dieta ==0) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table <- table %>% 
  filter(!grepl("S",TaxRank)) %>% 
  filter(!grepl("U",TaxRank)) %>% 
  filter(!grepl("R",TaxRank)) %>% 
  filter(!grepl("D",TaxRank)) %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo == 0) %>% 
  #filter(Tratamiento ==0) %>% 
  filter(Dieta ==0)  %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% 
  t() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  t()

Alpha Diversidad

Calculamos la alpha diversity

simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"

Análisis del indice de Simpson

mod_gaus <- glm(DiversityValue ~ Tratamiento, data = div %>% 
                  filter(DiversityIndex == "Simpson"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Tratamiento (formula: DiversityValue ~ Tratamiento). The model's explanatory
## power is substantial (R2 = 0.72). The model's intercept, corresponding to
## Tratamiento = 0, is at 0.93 (95% CI [0.93, 0.94], t(15) = 334.92, p < .001).
## Within this model:
## 
##   - The effect of Tratamiento [1] is statistically significant and positive (beta
## = 0.02, 95% CI [0.02, 0.03], t(15) = 6.24, p < .001; Std. beta = 1.67, 95% CI
## [1.15, 2.20])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter       | Coefficient |       SE |       95% CI |  t(15) |      p
## -------------------------------------------------------------------------
## (Intercept)     |        0.93 | 2.79e-03 | [0.93, 0.94] | 334.92 | < .001
## Tratamiento [1] |        0.02 | 3.63e-03 | [0.02, 0.03] |   6.24 | < .001
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Tratamiento"))
## Marginal Contrasts Analysis
## 
## Level1       |       Level2 | Difference |         95% CI |       SE | t(15) |      p
## -------------------------------------------------------------------------------------
## Tratamiento0 | Tratamiento1 |      -0.02 | [-0.03, -0.01] | 3.63e-03 | -6.24 | < .001
## 
## Marginal contrasts estimated at Tratamiento
## p-value adjustment method: Holm (1979)

Análisis del indice de Shannon

mod_gaus <- glm(DiversityValue ~Tratamiento, data = div %>% 
                  filter(DiversityIndex == "Shannon"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Tratamiento (formula: DiversityValue ~ Tratamiento). The model's explanatory
## power is substantial (R2 = 0.63). The model's intercept, corresponding to
## Tratamiento = 0, is at 3.23 (95% CI [3.10, 3.35], t(15) = 48.80, p < .001).
## Within this model:
## 
##   - The effect of Tratamiento [1] is statistically significant and positive (beta
## = 0.43, 95% CI [0.27, 0.60], t(15) = 5.04, p < .001; Std. beta = 1.56, 95% CI
## [0.95, 2.17])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter       | Coefficient |   SE |       95% CI | t(15) |      p
## --------------------------------------------------------------------
## (Intercept)     |        3.23 | 0.07 | [3.10, 3.35] | 48.80 | < .001
## Tratamiento [1] |        0.43 | 0.09 | [0.27, 0.60] |  5.04 | < .001
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Tratamiento"))
## Marginal Contrasts Analysis
## 
## Level1       |       Level2 | Difference |         95% CI |   SE | t(15) |      p
## ---------------------------------------------------------------------------------
## Tratamiento0 | Tratamiento1 |      -0.43 | [-0.62, -0.25] | 0.09 | -5.04 | < .001
## 
## Marginal contrasts estimated at Tratamiento
## p-value adjustment method: Holm (1979)

Beta Diversidad

Vamos a calcular la Beta-diversidad.

dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>% 
                     dplyr::select(Sample,contains("report")) %>%
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist %>% 
                     dplyr::select(Sample,Tratamiento) %>%
                     mutate(Tratamiento = as.factor(Tratamiento)) %>% 
                     column_to_rownames("Sample"))

X = otu_table %>% t()
Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Tratamiento)) %>% pull(Tratamiento)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

dist_ad <-dist %>% 
  dplyr::select(Sample,contains("report")) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  as.dist() 

adonis2(dist_ad ~ Tratamiento, dist %>% 
          dplyr::select(Sample,Tratamiento) %>% 
          column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_ad ~ Tratamiento, data = dist %>% dplyr::select(Sample, Tratamiento) %>% column_to_rownames("Sample"))
##             Df SumOfSqs      R2      F Pr(>F)  
## Tratamiento  1  0.15876 0.12138 2.0722   0.08 .
## Residual    15  1.14922 0.87862                
## Total       16  1.30798 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Análisis diferencial

• Qué taxas son las responsables de las diferencias

gamlss_table <- otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Tratamiento,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Tratamiento, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% 
  mutate(test = list(emmeans(model, specs = "Tratamiento", data = data) %>% 
                       pairs() %>% 
                       as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  write_tsv("Cuestion3.2.tsv")
## Joining, by = "NCBI"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  #separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() 
## Joining, by = "NCBI"
## Warning: Removed 1026 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Cuestion 4.1:

Machos vs Machos Castrados (Dieta SI/NO, Tratamiento Control, Studio 3, Hembras NO) formula: ~Castrado * Dieta

Creamos la OTU-table para resolver la pregunta.

samplingSize <- table %>% 
  filter(Name2 == "Bacteria") %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo == 1) %>% 
  filter(Tratamiento ==0) %>% 
  #filter(Dieta ==0) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table <- table %>% 
  filter(!grepl("S",TaxRank)) %>% 
  filter(!grepl("U",TaxRank)) %>% 
  filter(!grepl("R",TaxRank)) %>% 
  filter(!grepl("D",TaxRank)) %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo == 1) %>% 
  filter(Tratamiento ==0) %>% 
  #filter(Dieta ==0)  %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% 
  t() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  t()

Alpha Diversidad

Calculamos la alpha diversity

simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"

Análisis del indice de Simpson

mod_gaus <- glm(DiversityValue ~ Castrado * Dieta, data = div %>% 
                  filter(DiversityIndex == "Simpson"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Castrado and Dieta (formula: DiversityValue ~ Castrado * Dieta). The model's
## explanatory power is moderate (R2 = 0.15). The model's intercept, corresponding
## to Castrado = 0 and Dieta = 0, is at 0.95 (95% CI [0.93, 0.96], t(28) = 135.78,
## p < .001). Within this model:
## 
##   - The effect of Castrado [1] is statistically non-significant and positive
## (beta = 3.40e-03, 95% CI [-0.02, 0.02], t(28) = 0.36, p = 0.722; Std. beta =
## 0.18, 95% CI [-0.81, 1.16])
##   - The effect of Dieta [1] is statistically non-significant and negative (beta =
## -0.01, 95% CI [-0.03, 3.73e-03], t(28) = -1.56, p = 0.119; Std. beta = -0.76,
## 95% CI [-1.72, 0.20])
##   - The effect of Castrado [1] × Dieta [1] is statistically non-significant and
## positive (beta = 2.16e-03, 95% CI [-0.02, 0.03], t(28) = 0.16, p = 0.869; Std.
## beta = 0.11, 95% CI [-1.24, 1.46])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter                | Coefficient |       SE |        95% CI |  t(28) |      p
## -----------------------------------------------------------------------------------
## (Intercept)              |        0.95 | 6.97e-03 | [ 0.93, 0.96] | 135.78 | < .001
## Castrado [1]             |    3.40e-03 | 9.55e-03 | [-0.02, 0.02] |   0.36 | 0.722 
## Dieta [1]                |       -0.01 | 9.30e-03 | [-0.03, 0.00] |  -1.56 | 0.119 
## Castrado [1] × Dieta [1] |    2.16e-03 |     0.01 | [-0.02, 0.03] |   0.16 | 0.869
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Castrado","Dieta"))
## Marginal Contrasts Analysis
## 
## Level1           |           Level2 | Difference |        95% CI |       SE | t(28) |      p
## --------------------------------------------------------------------------------------------
## Castrado0 Dieta0 | Castrado0 Dieta1 |       0.01 | [-0.01, 0.04] | 9.30e-03 |  1.56 | 0.651 
## Castrado0 Dieta0 | Castrado1 Dieta0 |  -3.40e-03 | [-0.03, 0.02] | 9.55e-03 | -0.36 | > .999
## Castrado0 Dieta0 | Castrado1 Dieta1 |   8.94e-03 | [-0.02, 0.04] | 9.55e-03 |  0.94 | > .999
## Castrado0 Dieta1 | Castrado1 Dieta1 |  -5.56e-03 | [-0.03, 0.02] | 8.97e-03 | -0.62 | > .999
## Castrado1 Dieta0 | Castrado0 Dieta1 |       0.02 | [-0.01, 0.04] | 8.97e-03 |  2.00 | 0.334 
## Castrado1 Dieta0 | Castrado1 Dieta1 |       0.01 | [-0.01, 0.04] | 9.23e-03 |  1.34 | 0.768 
## 
## Marginal contrasts estimated at Castrado, Dieta
## p-value adjustment method: Holm (1979)

Análisis del indice de Shannon

mod_gaus <- glm(DiversityValue ~Castrado * Dieta, data = div %>% 
                  filter(DiversityIndex == "Shannon"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Castrado and Dieta (formula: DiversityValue ~ Castrado * Dieta). The model's
## explanatory power is moderate (R2 = 0.19). The model's intercept, corresponding
## to Castrado = 0 and Dieta = 0, is at 3.57 (95% CI [3.36, 3.78], t(28) = 33.41,
## p < .001). Within this model:
## 
##   - The effect of Castrado [1] is statistically non-significant and positive
## (beta = 0.04, 95% CI [-0.24, 0.33], t(28) = 0.29, p = 0.775; Std. beta = 0.14,
## 95% CI [-0.82, 1.10])
##   - The effect of Dieta [1] is statistically non-significant and negative (beta =
## -0.27, 95% CI [-0.55, 9.79e-03], t(28) = -1.89, p = 0.059; Std. beta = -0.90,
## 95% CI [-1.84, 0.03])
##   - The effect of Castrado [1] × Dieta [1] is statistically non-significant and
## positive (beta = 0.06, 95% CI [-0.33, 0.46], t(28) = 0.31, p = 0.755; Std. beta
## = 0.21, 95% CI [-1.11, 1.53])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter                | Coefficient |   SE |        95% CI | t(28) |      p
## ------------------------------------------------------------------------------
## (Intercept)              |        3.57 | 0.11 | [ 3.36, 3.78] | 33.41 | < .001
## Castrado [1]             |        0.04 | 0.15 | [-0.24, 0.33] |  0.29 | 0.775 
## Dieta [1]                |       -0.27 | 0.14 | [-0.55, 0.01] | -1.89 | 0.059 
## Castrado [1] × Dieta [1] |        0.06 | 0.20 | [-0.33, 0.46] |  0.31 | 0.755
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Castrado","Dieta"))
## Marginal Contrasts Analysis
## 
## Level1           |           Level2 | Difference |        95% CI |   SE | t(28) |     p
## ---------------------------------------------------------------------------------------
## Castrado0 Dieta0 | Castrado0 Dieta1 |       0.27 | [-0.13, 0.67] | 0.14 |  1.89 | 0.345
## Castrado0 Dieta0 | Castrado1 Dieta0 |      -0.04 | [-0.46, 0.37] | 0.15 | -0.29 | 0.908
## Castrado0 Dieta0 | Castrado1 Dieta1 |       0.16 | [-0.25, 0.58] | 0.15 |  1.13 | 0.806
## Castrado0 Dieta1 | Castrado1 Dieta1 |      -0.10 | [-0.49, 0.29] | 0.14 | -0.76 | 0.908
## Castrado1 Dieta0 | Castrado0 Dieta1 |       0.31 | [-0.08, 0.70] | 0.14 |  2.27 | 0.188
## Castrado1 Dieta0 | Castrado1 Dieta1 |       0.21 | [-0.19, 0.61] | 0.14 |  1.46 | 0.618
## 
## Marginal contrasts estimated at Castrado, Dieta
## p-value adjustment method: Holm (1979)

Beta Diversidad

Vamos a calcular la Beta-diversidad.

dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>% 
                     dplyr::select(Sample,contains("report")) %>%
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist %>% 
                     dplyr::select(Sample,Castrado,Dieta) %>%
                     mutate(Castrado = as.factor(Castrado)) %>% 
                     mutate(Dieta = as.factor(Dieta)) %>%
                     column_to_rownames("Sample"))

X = otu_table %>% t()
Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Castrado)) %>% pull(Castrado)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Dieta)) %>% pull(Dieta)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

dist_ad <-dist %>% 
  dplyr::select(Sample,contains("report")) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  as.dist() 

adonis2(dist_ad ~ Dieta * Castrado, dist %>% 
          dplyr::select(Sample,Dieta,Castrado) %>% 
          column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_ad ~ Dieta * Castrado, data = dist %>% dplyr::select(Sample, Dieta, Castrado) %>% column_to_rownames("Sample"))
##                Df SumOfSqs      R2       F Pr(>F)    
## Dieta           1   1.0342 0.29052 12.5845  0.001 ***
## Castrado        1   0.0725 0.02035  0.8817  0.475    
## Dieta:Castrado  1   0.1521 0.04273  1.8511  0.125    
## Residual       28   2.3010 0.64639                   
## Total          31   3.5597 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Análisis diferencial

• Qué taxas son las responsables de las diferencias

gamlss_table <- otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Dieta,Castrado,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Dieta * Castrado, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% 
  mutate(test = list(emmeans(model, specs = c("Dieta","Castrado"), data = data) %>% 
                       pairs() %>% 
                       as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  write_tsv("Cuestion4.1.tsv")
## Joining, by = "NCBI"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  #separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 6188 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 27 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 20 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Cuestion 4.2:

Hembras Tratamiento control vs Hembras Tratamiento (Dieta SI/NO, Studio3, Machos NO) forumula: ~Tratamiento * Dieta

Creamos la OTU-table para resolver la pregunta.

samplingSize <- table %>% 
  filter(Name2 == "Bacteria") %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo == 0) %>% 
  #filter(Tratamiento ==0) %>% 
  #filter(Dieta ==0) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)

otu_table <- table %>% 
  filter(!grepl("S",TaxRank)) %>% 
  filter(!grepl("U",TaxRank)) %>% 
  filter(!grepl("R",TaxRank)) %>% 
  filter(!grepl("D",TaxRank)) %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo == 0) %>% 
  #filter(Tratamiento ==0) %>% 
  #filter(Dieta ==0)  %>% 
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% 
  t() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  t()

Alpha Diversidad

Calculamos la alpha diversity

simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"

Análisis del indice de Simpson

mod_gaus <- glm(DiversityValue ~ Tratamiento * Dieta, data = div %>% 
                  filter(DiversityIndex == "Simpson"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Tratamiento and Dieta (formula: DiversityValue ~ Tratamiento * Dieta). The
## model's explanatory power is moderate (R2 = 0.23). The model's intercept,
## corresponding to Tratamiento = 0 and Dieta = 0, is at 0.93 (95% CI [0.92,
## 0.94], t(32) = 165.91, p < .001). Within this model:
## 
##   - The effect of Tratamiento [1] is statistically significant and positive (beta
## = 0.02, 95% CI [8.36e-03, 0.04], t(32) = 3.10, p = 0.002; Std. beta = 1.40, 95%
## CI [0.51, 2.28])
##   - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-3.28e-03, 0.03], t(32) = 1.51, p = 0.130; Std. beta = 0.68, 95%
## CI [-0.20, 1.57])
##   - The effect of Tratamiento [1] × Dieta [1] is statistically significant and
## negative (beta = -0.02, 95% CI [-0.04, -1.37e-03], t(32) = -2.10, p = 0.036;
## Std. beta = -1.29, 95% CI [-2.50, -0.08])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter                   | Coefficient |       SE |         95% CI |  t(32) |      p
## ---------------------------------------------------------------------------------------
## (Intercept)                 |        0.93 | 5.63e-03 | [ 0.92,  0.94] | 165.91 | < .001
## Tratamiento [1]             |        0.02 | 7.34e-03 | [ 0.01,  0.04] |   3.10 | 0.002 
## Dieta [1]                   |        0.01 | 7.34e-03 | [ 0.00,  0.03] |   1.51 | 0.130 
## Tratamiento [1] × Dieta [1] |       -0.02 |     0.01 | [-0.04,  0.00] |  -2.10 | 0.036
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Tratamiento","Dieta"))
## Marginal Contrasts Analysis
## 
## Level1              |              Level2 | Difference |         95% CI |       SE | t(32) |     p
## --------------------------------------------------------------------------------------------------
## Tratamiento0 Dieta0 | Tratamiento0 Dieta1 |      -0.01 | [-0.03,  0.01] | 7.34e-03 | -1.51 | 0.450
## Tratamiento0 Dieta0 | Tratamiento1 Dieta0 |      -0.02 | [-0.04,  0.00] | 7.34e-03 | -3.10 | 0.024
## Tratamiento0 Dieta0 | Tratamiento1 Dieta1 |      -0.01 | [-0.03,  0.01] | 7.50e-03 | -1.71 | 0.450
## Tratamiento0 Dieta1 | Tratamiento1 Dieta1 |  -1.71e-03 | [-0.02,  0.02] | 6.84e-03 | -0.25 | 0.805
## Tratamiento1 Dieta0 | Tratamiento0 Dieta1 |       0.01 | [-0.01,  0.03] | 6.66e-03 |  1.75 | 0.450
## Tratamiento1 Dieta0 | Tratamiento1 Dieta1 |   9.93e-03 | [-0.01,  0.03] | 6.84e-03 |  1.45 | 0.450
## 
## Marginal contrasts estimated at Tratamiento, Dieta
## p-value adjustment method: Holm (1979)

Análisis del indice de Shannon

mod_gaus <- glm(DiversityValue ~Tratamiento * Dieta, data = div %>% 
                  filter(DiversityIndex == "Shannon"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Tratamiento and Dieta (formula: DiversityValue ~ Tratamiento * Dieta). The
## model's explanatory power is substantial (R2 = 0.33). The model's intercept,
## corresponding to Tratamiento = 0 and Dieta = 0, is at 3.22 (95% CI [3.06,
## 3.39], t(32) = 37.53, p < .001). Within this model:
## 
##   - The effect of Tratamiento [1] is statistically significant and positive (beta
## = 0.43, 95% CI [0.21, 0.65], t(32) = 3.87, p < .001; Std. beta = 1.63, 95% CI
## [0.81, 2.46])
##   - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.19, 95% CI [-0.03, 0.41], t(32) = 1.70, p = 0.088; Std. beta = 0.72, 95% CI
## [-0.11, 1.55])
##   - The effect of Tratamiento [1] × Dieta [1] is statistically significant and
## negative (beta = -0.43, 95% CI [-0.73, -0.13], t(32) = -2.82, p = 0.005; Std.
## beta = -1.63, 95% CI [-2.76, -0.49])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter                   | Coefficient |   SE |         95% CI | t(32) |      p
## ----------------------------------------------------------------------------------
## (Intercept)                 |        3.22 | 0.09 | [ 3.06,  3.39] | 37.53 | < .001
## Tratamiento [1]             |        0.43 | 0.11 | [ 0.21,  0.65] |  3.87 | < .001
## Dieta [1]                   |        0.19 | 0.11 | [-0.03,  0.41] |  1.70 | 0.088 
## Tratamiento [1] × Dieta [1] |       -0.43 | 0.15 | [-0.73, -0.13] | -2.82 | 0.005
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Tratamiento","Dieta"))
## Marginal Contrasts Analysis
## 
## Level1              |              Level2 | Difference |         95% CI |   SE | t(32) |     p
## ----------------------------------------------------------------------------------------------
## Tratamiento0 Dieta0 | Tratamiento0 Dieta1 |      -0.19 | [-0.51,  0.12] | 0.11 | -1.70 | 0.294
## Tratamiento0 Dieta0 | Tratamiento1 Dieta0 |      -0.43 | [-0.75, -0.12] | 0.11 | -3.87 | 0.003
## Tratamiento0 Dieta0 | Tratamiento1 Dieta1 |      -0.19 | [-0.51,  0.13] | 0.11 | -1.68 | 0.294
## Tratamiento0 Dieta1 | Tratamiento1 Dieta1 |  -1.69e-03 | [-0.30,  0.29] | 0.10 | -0.02 | 0.987
## Tratamiento1 Dieta0 | Tratamiento0 Dieta1 |       0.24 | [-0.04,  0.53] | 0.10 |  2.38 | 0.117
## Tratamiento1 Dieta0 | Tratamiento1 Dieta1 |       0.24 | [-0.05,  0.53] | 0.10 |  2.30 | 0.117
## 
## Marginal contrasts estimated at Tratamiento, Dieta
## p-value adjustment method: Holm (1979)

Beta Diversidad

Vamos a calcular la Beta-diversidad.

dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>% 
                     dplyr::select(Sample,contains("report")) %>%
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist %>% 
                     dplyr::select(Sample,Tratamiento,Dieta) %>%
                     mutate(Tratamiento = as.factor(Tratamiento)) %>% 
                     mutate(Dieta = as.factor(Dieta)) %>%
                     column_to_rownames("Sample"))

X = otu_table %>% t()
Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Tratamiento)) %>% pull(Tratamiento)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Dieta)) %>% pull(Dieta)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

dist_ad <-dist %>% 
  dplyr::select(Sample,contains("report")) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  as.dist() 

adonis2(dist_ad ~ Dieta * Tratamiento, dist %>% 
          dplyr::select(Sample,Dieta,Tratamiento) %>% 
          column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_ad ~ Dieta * Tratamiento, data = dist %>% dplyr::select(Sample, Dieta, Tratamiento) %>% column_to_rownames("Sample"))
##                   Df SumOfSqs      R2      F Pr(>F)   
## Dieta              1  0.36719 0.11681 4.6384  0.003 **
## Tratamiento        1  0.17939 0.05707 2.2661  0.059 . 
## Dieta:Tratamiento  1  0.06377 0.02029 0.8056  0.541   
## Residual          32  2.53326 0.80584                 
## Total             35  3.14362 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Análisis diferencial

• Qué taxas son las responsables de las diferencias

gamlss_table <- otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata) %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Dieta,Tratamiento,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Dieta + Tratamiento, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% 
  mutate(test = list(emmeans(model, specs = c("Dieta","Tratamiento"), data = data) %>% 
                       pairs() %>% 
                       as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  write_tsv("Cuestion4.2.tsv")
## Joining, by = "NCBI"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  #separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 5848 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 34 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 34 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Cuestion 5.1:

Crear nueva variable. Disfuncion gonadal (SI <- Machos Castrados, Hembras Tratamiento Si, El resto NO) Sampling: Hembras Dieta NO, Tratameinto SI/NO Machos Dieta NO, Tratamiento NO Machos Castrados, Dieta NO, Tratamiento NO formula = ~ Sexo * Disfuncion Gonadal

Creamos la OTU-table para resolver la pregunta.

table2 <- table %>% 
  mutate(DisfuncionGonadal = 0) %>% 
  mutate(DisfuncionGonadal = ifelse(Castrado ==1,1,DisfuncionGonadal)) %>% 
  mutate(DisfuncionGonadal = ifelse((Sexo ==0 & Tratamiento ==1),1,DisfuncionGonadal)) %>% 
  mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal))

metadata2 <- metadata %>% 
  mutate(DisfuncionGonadal = 0) %>% 
  mutate(DisfuncionGonadal = ifelse(Castrado ==1,1,DisfuncionGonadal)) %>% 
  mutate(DisfuncionGonadal = ifelse((Sexo ==0 & Tratamiento ==1),1,DisfuncionGonadal)) %>% 
  mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal))

samplesGroup <- metadata %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo ==0 & Dieta ==0) %>% 
  bind_rows(metadata %>% 
              filter(Studio ==3) %>% 
              filter(Description =="Adulta") %>%
              filter(Sexo ==1 &Tratamiento ==0 & Dieta==0)) %>% 
  dplyr::select(Sample)
  


samplingSize <- table2 %>% 
  filter(Name2 == "Bacteria") %>% 
  semi_join(samplesGroup) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)
## Joining, by = "Sample"
otu_table <- table2 %>% 
  filter(!grepl("S",TaxRank)) %>% 
  filter(!grepl("U",TaxRank)) %>% 
  filter(!grepl("R",TaxRank)) %>% 
  filter(!grepl("D",TaxRank)) %>% 
  semi_join(samplesGroup) %>%  
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% 
  t() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  t()
## Joining, by = "Sample"

Alpha Diversidad

Calculamos la alpha diversity

simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(metadata2)
## Joining, by = "Sample"
## Joining, by = "Sample"

Análisis del indice de Simpson

mod_gaus <- glm(DiversityValue ~ Sexo*DisfuncionGonadal, data = div %>% 
                  filter(DiversityIndex == "Simpson"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo and DisfuncionGonadal (formula: DiversityValue ~ Sexo *
## DisfuncionGonadal). The model's explanatory power is substantial (R2 = 0.27).
## The model's intercept, corresponding to Sexo = 0 and DisfuncionGonadal = 0, is
## at 0.93 (95% CI [0.92, 0.94], t(28) = 171.58, p < .001). Within this model:
## 
##   - The effect of Sexo [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-1.97e-03, 0.03], t(28) = 1.70, p = 0.088; Std. beta = 0.82, 95%
## CI [-0.12, 1.76])
##   - The effect of DisfuncionGonadal [1] is statistically significant and positive
## (beta = 0.02, 95% CI [8.60e-03, 0.04], t(28) = 3.17, p = 0.002; Std. beta =
## 1.40, 95% CI [0.54, 2.27])
##   - The effect of Sexo [1] × DisfuncionGonadal [1] is statistically
## non-significant and negative (beta = -0.02, 95% CI [-0.04, 1.37e-03], t(28) =
## -1.83, p = 0.068; Std. beta = -1.17, 95% CI [-2.43, 0.09])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter                        | Coefficient |       SE |        95% CI |  t(28) |      p
## -------------------------------------------------------------------------------------------
## (Intercept)                      |        0.93 | 5.44e-03 | [ 0.92, 0.94] | 171.58 | < .001
## Sexo [1]                         |        0.01 | 7.69e-03 | [ 0.00, 0.03] |   1.70 | 0.088 
## DisfuncionGonadal [1]            |        0.02 | 7.09e-03 | [ 0.01, 0.04] |   3.17 | 0.002 
## Sexo [1] × DisfuncionGonadal [1] |       -0.02 |     0.01 | [-0.04, 0.00] |  -1.83 | 0.068
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("DisfuncionGonadal","Sexo"))
## Marginal Contrasts Analysis
## 
## Level1                   |                   Level2 | Difference |         95% CI |       SE | t(28) |     p
## ------------------------------------------------------------------------------------------------------------
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal0 Sexo1 |      -0.01 | [-0.03,  0.01] | 7.69e-03 | -1.70 | 0.398
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal1 Sexo0 |      -0.02 | [-0.04,  0.00] | 7.09e-03 | -3.17 | 0.022
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal1 Sexo1 |      -0.02 | [-0.04,  0.00] | 7.45e-03 | -2.26 | 0.160
## DisfuncionGonadal0 Sexo1 | DisfuncionGonadal1 Sexo1 |  -3.71e-03 | [-0.02,  0.02] | 7.45e-03 | -0.50 | 0.825
## DisfuncionGonadal1 Sexo0 | DisfuncionGonadal0 Sexo1 |   9.39e-03 | [-0.01,  0.03] | 7.09e-03 |  1.32 | 0.589
## DisfuncionGonadal1 Sexo0 | DisfuncionGonadal1 Sexo1 |   5.68e-03 | [-0.01,  0.03] | 6.83e-03 |  0.83 | 0.825
## 
## Marginal contrasts estimated at DisfuncionGonadal, Sexo
## p-value adjustment method: Holm (1979)

Análisis del indice de Shannon

mod_gaus <- glm(DiversityValue ~Sexo * DisfuncionGonadal, data = div %>% 
                  filter(DiversityIndex == "Shannon"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo and DisfuncionGonadal (formula: DiversityValue ~ Sexo *
## DisfuncionGonadal). The model's explanatory power is substantial (R2 = 0.35).
## The model's intercept, corresponding to Sexo = 0 and DisfuncionGonadal = 0, is
## at 3.22 (95% CI [3.05, 3.40], t(28) = 35.82, p < .001). Within this model:
## 
##   - The effect of Sexo [1] is statistically significant and positive (beta =
## 0.33, 95% CI [0.09, 0.58], t(28) = 2.63, p = 0.009; Std. beta = 1.19, 95% CI
## [0.30, 2.08])
##   - The effect of DisfuncionGonadal [1] is statistically significant and positive
## (beta = 0.43, 95% CI [0.20, 0.66], t(28) = 3.67, p < .001; Std. beta = 1.53,
## 95% CI [0.71, 2.35])
##   - The effect of Sexo [1] × DisfuncionGonadal [1] is statistically significant
## and negative (beta = -0.38, 95% CI [-0.72, -0.05], t(28) = -2.25, p = 0.024;
## Std. beta = -1.36, 95% CI [-2.55, -0.18])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter                        | Coefficient |   SE |         95% CI | t(28) |      p
## ---------------------------------------------------------------------------------------
## (Intercept)                      |        3.22 | 0.09 | [ 3.05,  3.40] | 35.82 | < .001
## Sexo [1]                         |        0.33 | 0.13 | [ 0.09,  0.58] |  2.63 | 0.009 
## DisfuncionGonadal [1]            |        0.43 | 0.12 | [ 0.20,  0.66] |  3.67 | < .001
## Sexo [1] × DisfuncionGonadal [1] |       -0.38 | 0.17 | [-0.72, -0.05] | -2.25 | 0.024
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("DisfuncionGonadal","Sexo"))
## Marginal Contrasts Analysis
## 
## Level1                   |                   Level2 | Difference |         95% CI |   SE | t(28) |      p
## ---------------------------------------------------------------------------------------------------------
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal0 Sexo1 |      -0.33 | [-0.70,  0.03] | 0.13 | -2.63 | 0.055 
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal1 Sexo0 |      -0.43 | [-0.76, -0.10] | 0.12 | -3.67 | 0.006 
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal1 Sexo1 |      -0.38 | [-0.73, -0.03] | 0.12 | -3.10 | 0.022 
## DisfuncionGonadal0 Sexo1 | DisfuncionGonadal1 Sexo1 |      -0.05 | [-0.40,  0.30] | 0.12 | -0.39 | > .999
## DisfuncionGonadal1 Sexo0 | DisfuncionGonadal0 Sexo1 |       0.10 | [-0.24,  0.43] | 0.12 |  0.82 | > .999
## DisfuncionGonadal1 Sexo0 | DisfuncionGonadal1 Sexo1 |       0.05 | [-0.27,  0.37] | 0.11 |  0.43 | > .999
## 
## Marginal contrasts estimated at DisfuncionGonadal, Sexo
## p-value adjustment method: Holm (1979)

Beta Diversidad

Vamos a calcular la Beta-diversidad.

dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(metadata2)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>% 
                     dplyr::select(Sample,contains("report")) %>%
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist %>% 
                     dplyr::select(Sample,Sexo,DisfuncionGonadal) %>%
                     mutate(Sexo = as.factor(Sexo)) %>% 
                     mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal)) %>% 
                     column_to_rownames("Sample"))

X = otu_table %>% t()
Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo)) %>% pull(Sexo)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata2 %>% dplyr::select(Sample,DisfuncionGonadal)) %>% pull(DisfuncionGonadal)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

dist_ad <-dist %>% 
  dplyr::select(Sample,contains("report")) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  as.dist() 

adonis2(dist_ad ~ Sexo * DisfuncionGonadal, dist %>% 
          dplyr::select(Sample,Sexo,DisfuncionGonadal) %>% 
          column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_ad ~ Sexo * DisfuncionGonadal, data = dist %>% dplyr::select(Sample, Sexo, DisfuncionGonadal) %>% column_to_rownames("Sample"))
##                        Df SumOfSqs      R2      F Pr(>F)  
## Sexo                    1  0.18963 0.06585 2.1354  0.055 .
## DisfuncionGonadal       1  0.10742 0.03730 1.2097  0.299  
## Sexo:DisfuncionGonadal  1  0.09640 0.03347 1.0855  0.359  
## Residual               28  2.48646 0.86338                
## Total                  31  2.87990 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Análisis diferencial

• Qué taxas son las responsables de las diferencias

gamlss_table <- otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata2) %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Sexo,DisfuncionGonadal,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Sexo + DisfuncionGonadal, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% 
  mutate(test = list(emmeans(model, specs = c("Sexo","DisfuncionGonadal"), data = data) %>% 
                       pairs() %>% 
                       as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  write_tsv("Cuestion5.1.tsv")
## Joining, by = "NCBI"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  #separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 6023 rows containing missing values (`geom_text_repel()`).

Cuestion 5.2

Crear nueva variable. Disfuncion gonadal (SI <- Machos Castrados, Hembras Tratamiento Si, El resto NO) Sampling: Hembras Dieta SI/NO, Tratameinto SI/NO Machos Dieta SI/NO, Tratamiento NO Machos Castrados, Dieta SI/NO, Tratamiento NO formula = ~ Sexo * Disfuncion Gonadal * Dieta

Creamos la OTU-table para resolver la pregunta.

table2 <- table %>% 
  mutate(DisfuncionGonadal = 0) %>% 
  mutate(DisfuncionGonadal = ifelse(Castrado ==1,1,DisfuncionGonadal)) %>% 
  mutate(DisfuncionGonadal = ifelse((Sexo ==0 & Tratamiento ==1),1,DisfuncionGonadal))%>% 
  mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal))

metadata2 <- metadata %>% 
  mutate(DisfuncionGonadal = 0) %>% 
  mutate(DisfuncionGonadal = ifelse(Castrado ==1,1,DisfuncionGonadal)) %>% 
  mutate(DisfuncionGonadal = ifelse((Sexo ==0 & Tratamiento ==1),1,DisfuncionGonadal))%>% 
  mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal))

samplesGroup <- metadata %>% 
  filter(Studio ==3) %>% 
  filter(Description =="Adulta") %>% 
  filter(Sexo ==0) %>% 
  bind_rows(metadata %>% 
              filter(Studio ==3) %>% 
              filter(Description =="Adulta") %>%
              filter(Sexo ==1 &Tratamiento ==0)) %>% 
  dplyr::select(Sample)
  


samplingSize <- table2 %>% 
  filter(Name2 == "Bacteria") %>% 
  semi_join(samplesGroup) %>% 
  group_by(Sample) %>% 
  summarise(TotalReads = sum(Reads)) %>%
  slice_min(TotalReads)
## Joining, by = "Sample"
otu_table <- table2 %>% 
  filter(!grepl("S",TaxRank)) %>% 
  filter(!grepl("U",TaxRank)) %>% 
  filter(!grepl("R",TaxRank)) %>% 
  filter(!grepl("D",TaxRank)) %>% 
  semi_join(samplesGroup) %>%  
  dplyr::select(Sample,Reads,NCBI) %>% 
  pivot_wider(names_from = Sample,
              values_from = Reads,
              values_fill =0) %>% 
  column_to_rownames("NCBI") %>% 
  as.matrix() %>% 
  t() %>% 
  rrarefy(sample = samplingSize$TotalReads) %>% 
  t()
## Joining, by = "Sample"

Alpha Diversidad

Calculamos la alpha diversity

simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>% 
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>% 
  as.data.frame() %>% 
  rownames_to_column("Sample") %>% 
  rename(Shannon = 2)

div <- inner_join(simpson, shannon) %>% 
  pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>% 
  inner_join(metadata2)
## Joining, by = "Sample"
## Joining, by = "Sample"

Análisis del indice de Simpson

mod_gaus <- glm(DiversityValue ~ Sexo*DisfuncionGonadal*Dieta, data = div %>% 
                  filter(DiversityIndex == "Simpson"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo, DisfuncionGonadal and Dieta (formula: DiversityValue ~ Sexo *
## DisfuncionGonadal * Dieta). The model's explanatory power is moderate (R2 =
## 0.20). The model's intercept, corresponding to Sexo = 0, DisfuncionGonadal = 0
## and Dieta = 0, is at 0.93 (95% CI [0.92, 0.95], t(60) = 148.63, p < .001).
## Within this model:
## 
##   - The effect of Sexo [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-3.90e-03, 0.03], t(60) = 1.52, p = 0.128; Std. beta = 0.77, 95%
## CI [-0.22, 1.76])
##   - The effect of DisfuncionGonadal [1] is statistically significant and positive
## (beta = 0.02, 95% CI [6.52e-03, 0.04], t(60) = 2.76, p = 0.006; Std. beta =
## 1.28, 95% CI [0.37, 2.20])
##   - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-5.35e-03, 0.03], t(60) = 1.31, p = 0.191; Std. beta = 0.61, 95%
## CI [-0.30, 1.52])
##   - The effect of Sexo [1] × DisfuncionGonadal [1] is statistically
## non-significant and negative (beta = -0.02, 95% CI [-0.04, 3.99e-03], t(60) =
## -1.62, p = 0.104; Std. beta = -1.10, 95% CI [-2.42, 0.23])
##   - The effect of Sexo [1] × Dieta [1] is statistically significant and negative
## (beta = -0.03, 95% CI [-0.05, -2.16e-03], t(60) = -2.14, p = 0.032; Std. beta =
## -1.43, 95% CI [-2.74, -0.12])
##   - The effect of DisfuncionGonadal [1] × Dieta [1] is statistically
## non-significant and negative (beta = -0.02, 95% CI [-0.04, 1.50e-03], t(60) =
## -1.83, p = 0.068; Std. beta = -1.16, 95% CI [-2.41, 0.09])
##   - The effect of (Sexo [1] × DisfuncionGonadal [1]) × Dieta [1] is statistically
## non-significant and positive (beta = 0.02, 95% CI [-9.46e-03, 0.05], t(60) =
## 1.38, p = 0.168; Std. beta = 1.28, 95% CI [-0.54, 3.09])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter                                      | Coefficient |       SE |         95% CI |  t(60) |      p
## ----------------------------------------------------------------------------------------------------------
## (Intercept)                                    |        0.93 | 6.28e-03 | [ 0.92,  0.95] | 148.63 | < .001
## Sexo [1]                                       |        0.01 | 8.88e-03 | [ 0.00,  0.03] |   1.52 | 0.128 
## DisfuncionGonadal [1]                          |        0.02 | 8.19e-03 | [ 0.01,  0.04] |   2.76 | 0.006 
## Dieta [1]                                      |        0.01 | 8.19e-03 | [-0.01,  0.03] |   1.31 | 0.191 
## Sexo [1] × DisfuncionGonadal [1]               |       -0.02 |     0.01 | [-0.04,  0.00] |  -1.62 | 0.104 
## Sexo [1] × Dieta [1]                           |       -0.03 |     0.01 | [-0.05,  0.00] |  -2.14 | 0.032 
## DisfuncionGonadal [1] × Dieta [1]              |       -0.02 |     0.01 | [-0.04,  0.00] |  -1.83 | 0.068 
## (Sexo [1] × DisfuncionGonadal [1]) × Dieta [1] |        0.02 |     0.02 | [-0.01,  0.05] |   1.38 | 0.168
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("DisfuncionGonadal","Sexo","Dieta"))
## Marginal Contrasts Analysis
## 
## Level1                          |                          Level2 | Difference |        95% CI |       SE | t(60) |      p
## --------------------------------------------------------------------------------------------------------------------------
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 |      -0.01 | [-0.04, 0.02] | 8.19e-03 | -1.31 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta0 |      -0.01 | [-0.04, 0.02] | 8.88e-03 | -1.52 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 |   9.09e-04 | [-0.03, 0.03] | 8.37e-03 |  0.11 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta0 |      -0.02 | [-0.05, 0.00] | 8.19e-03 | -2.76 | 0.209 
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 |      -0.01 | [-0.04, 0.01] | 8.37e-03 | -1.53 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 |      -0.02 | [-0.04, 0.01] | 8.60e-03 | -1.95 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 |  -4.35e-03 | [-0.03, 0.02] | 8.60e-03 | -0.51 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.01 | [-0.01, 0.04] | 7.63e-03 |  1.52 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo0 Dieta1 |  -2.12e-03 | [-0.03, 0.02] | 7.63e-03 | -0.28 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 |   6.35e-03 | [-0.02, 0.03] | 7.88e-03 |  0.81 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 |   2.81e-03 | [-0.02, 0.03] | 8.19e-03 |  0.34 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.01 | [-0.01, 0.04] | 8.37e-03 |  1.72 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 |   6.85e-04 | [-0.03, 0.03] | 8.37e-03 |  0.08 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 |  -3.28e-03 | [-0.03, 0.02] | 8.60e-03 | -0.38 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 |   9.16e-03 | [-0.02, 0.04] | 8.60e-03 |  1.06 | > .999
## DisfuncionGonadal0 Sexo1 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 |  -5.26e-03 | [-0.03, 0.02] | 8.07e-03 | -0.65 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 |       0.01 | [-0.01, 0.04] | 7.43e-03 |  1.60 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta0 |   9.06e-03 | [-0.02, 0.04] | 8.19e-03 |  1.11 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.02 | [ 0.00, 0.05] | 7.63e-03 |  3.07 | 0.089 
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 |   9.75e-03 | [-0.02, 0.03] | 7.63e-03 |  1.28 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 |   5.78e-03 | [-0.02, 0.03] | 7.88e-03 |  0.73 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 |       0.02 | [-0.01, 0.04] | 7.88e-03 |  2.31 | 0.631 
## DisfuncionGonadal1 Sexo0 Dieta1 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.01 | [-0.01, 0.04] | 7.83e-03 |  1.75 | > .999
## DisfuncionGonadal1 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 |   8.47e-03 | [-0.02, 0.03] | 8.07e-03 |  1.05 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 |   6.09e-03 | [-0.02, 0.03] | 7.88e-03 |  0.77 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.02 | [-0.01, 0.04] | 8.07e-03 |  2.19 | 0.807 
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 |   3.97e-03 | [-0.02, 0.03] | 8.07e-03 |  0.49 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 |       0.01 | [-0.01, 0.04] | 8.31e-03 |  1.50 | > .999
## 
## Marginal contrasts estimated at DisfuncionGonadal, Sexo, Dieta
## p-value adjustment method: Holm (1979)

Análisis del indice de Shannon

mod_gaus <- glm(DiversityValue ~Sexo * DisfuncionGonadal * Dieta, data = div %>% 
                  filter(DiversityIndex == "Shannon"),
                family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo, DisfuncionGonadal and Dieta (formula: DiversityValue ~ Sexo *
## DisfuncionGonadal * Dieta). The model's explanatory power is moderate (R2 =
## 0.25). The model's intercept, corresponding to Sexo = 0, DisfuncionGonadal = 0
## and Dieta = 0, is at 3.22 (95% CI [3.04, 3.41], t(60) = 33.52, p < .001).
## Within this model:
## 
##   - The effect of Sexo [1] is statistically significant and positive (beta =
## 0.34, 95% CI [0.07, 0.61], t(60) = 2.50, p = 0.013; Std. beta = 1.22, 95% CI
## [0.26, 2.18])
##   - The effect of DisfuncionGonadal [1] is statistically significant and positive
## (beta = 0.43, 95% CI [0.19, 0.68], t(60) = 3.44, p < .001; Std. beta = 1.55,
## 95% CI [0.66, 2.43])
##   - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.18, 95% CI [-0.06, 0.43], t(60) = 1.47, p = 0.141; Std. beta = 0.66, 95% CI
## [-0.22, 1.55])
##   - The effect of Sexo [1] × DisfuncionGonadal [1] is statistically significant
## and negative (beta = -0.39, 95% CI [-0.75, -0.03], t(60) = -2.14, p = 0.032;
## Std. beta = -1.40, 95% CI [-2.68, -0.12])
##   - The effect of Sexo [1] × Dieta [1] is statistically significant and negative
## (beta = -0.45, 95% CI [-0.80, -0.10], t(60) = -2.51, p = 0.012; Std. beta =
## -1.61, 95% CI [-2.88, -0.35])
##   - The effect of DisfuncionGonadal [1] × Dieta [1] is statistically significant
## and negative (beta = -0.42, 95% CI [-0.76, -0.09], t(60) = -2.47, p = 0.014;
## Std. beta = -1.52, 95% CI [-2.73, -0.31])
##   - The effect of (Sexo [1] × DisfuncionGonadal [1]) × Dieta [1] is statistically
## non-significant and positive (beta = 0.48, 95% CI [-6.83e-03, 0.97], t(60) =
## 1.93, p = 0.053; Std. beta = 1.73, 95% CI [-0.02, 3.48])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter                                      | Coefficient |   SE |         95% CI | t(60) |      p
## -----------------------------------------------------------------------------------------------------
## (Intercept)                                    |        3.22 | 0.10 | [ 3.04,  3.41] | 33.52 | < .001
## Sexo [1]                                       |        0.34 | 0.14 | [ 0.07,  0.61] |  2.50 | 0.013 
## DisfuncionGonadal [1]                          |        0.43 | 0.13 | [ 0.19,  0.68] |  3.44 | < .001
## Dieta [1]                                      |        0.18 | 0.13 | [-0.06,  0.43] |  1.47 | 0.141 
## Sexo [1] × DisfuncionGonadal [1]               |       -0.39 | 0.18 | [-0.75, -0.03] | -2.14 | 0.032 
## Sexo [1] × Dieta [1]                           |       -0.45 | 0.18 | [-0.80, -0.10] | -2.51 | 0.012 
## DisfuncionGonadal [1] × Dieta [1]              |       -0.42 | 0.17 | [-0.76, -0.09] | -2.47 | 0.014 
## (Sexo [1] × DisfuncionGonadal [1]) × Dieta [1] |        0.48 | 0.25 | [-0.01,  0.97] |  1.93 | 0.053
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("DisfuncionGonadal","Sexo","Dieta"))
## Marginal Contrasts Analysis
## 
## Level1                          |                          Level2 | Difference |         95% CI |   SE | t(60) |      p
## -----------------------------------------------------------------------------------------------------------------------
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 |      -0.18 | [-0.59,  0.23] | 0.13 | -1.47 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta0 |      -0.34 | [-0.78,  0.11] | 0.14 | -2.50 | 0.384 
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 |      -0.07 | [-0.49,  0.34] | 0.13 | -0.58 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta0 |      -0.43 | [-0.84, -0.02] | 0.13 | -3.44 | 0.030 
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 |      -0.19 | [-0.61,  0.23] | 0.13 | -1.50 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 |      -0.38 | [-0.81,  0.05] | 0.13 | -2.90 | 0.137 
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 |      -0.17 | [-0.61,  0.26] | 0.13 | -1.33 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.11 | [-0.27,  0.49] | 0.12 |  0.94 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo0 Dieta1 |  -7.61e-03 | [-0.39,  0.37] | 0.12 | -0.07 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 |       0.01 | [-0.38,  0.40] | 0.12 |  0.08 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 |       0.15 | [-0.26,  0.56] | 0.13 |  1.23 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.26 | [-0.15,  0.68] | 0.13 |  2.07 | 0.908 
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 |       0.15 | [-0.27,  0.57] | 0.13 |  1.15 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 |      -0.04 | [-0.47,  0.39] | 0.13 | -0.32 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 |       0.16 | [-0.27,  0.60] | 0.13 |  1.25 | > .999
## DisfuncionGonadal0 Sexo1 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 |      -0.10 | [-0.50,  0.30] | 0.12 | -0.81 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 |       0.25 | [-0.13,  0.62] | 0.11 |  2.16 | 0.796 
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta0 |       0.09 | [-0.32,  0.50] | 0.13 |  0.73 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.36 | [-0.03,  0.74] | 0.12 |  3.05 | 0.093 
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 |       0.24 | [-0.14,  0.62] | 0.12 |  2.04 | 0.916 
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 |       0.05 | [-0.35,  0.44] | 0.12 |  0.41 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 |       0.26 | [-0.14,  0.65] | 0.12 |  2.12 | 0.834 
## DisfuncionGonadal1 Sexo0 Dieta1 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.12 | [-0.27,  0.51] | 0.12 |  0.98 | > .999
## DisfuncionGonadal1 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 |       0.02 | [-0.39,  0.42] | 0.12 |  0.14 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 |       0.20 | [-0.20,  0.59] | 0.12 |  1.63 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 |       0.31 | [-0.10,  0.71] | 0.12 |  2.48 | 0.384 
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 |       0.19 | [-0.22,  0.59] | 0.12 |  1.53 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 |       0.21 | [-0.21,  0.62] | 0.13 |  1.62 | > .999
## 
## Marginal contrasts estimated at DisfuncionGonadal, Sexo, Dieta
## p-value adjustment method: Holm (1979)

Beta Diversidad

Vamos a calcular la Beta-diversidad.

dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>% 
  as.matrix() %>% 
  as_tibble(rownames = "Sample") %>% 
  inner_join(metadata2)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>% 
                     dplyr::select(Sample,contains("report")) %>%
                     column_to_rownames("Sample") %>% 
                     as.matrix() %>% 
                     as.dist(), 
                   annotation_row = dist %>% 
                     dplyr::select(Sample,Sexo,DisfuncionGonadal,Dieta) %>%
                     mutate(Sexo = as.factor(Sexo)) %>% 
                     mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal)) %>% 
                     mutate(Dieta = as.factor(Dieta)) %>% 
                     column_to_rownames("Sample"))

X = otu_table %>% t()
Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo)) %>% pull(Sexo)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata2 %>% dplyr::select(Sample,DisfuncionGonadal)) %>% pull(DisfuncionGonadal)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

Y = otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% inner_join(metadata2 %>% dplyr::select(Sample,Dieta)) %>% pull(Dieta)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)

dist_ad <-dist %>% 
  dplyr::select(Sample,contains("report")) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  as.dist() 

adonis2(dist_ad ~ Sexo * DisfuncionGonadal * Dieta, dist %>% 
          dplyr::select(Sample,Sexo,DisfuncionGonadal,Dieta) %>% 
          column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_ad ~ Sexo * DisfuncionGonadal * Dieta, data = dist %>% dplyr::select(Sample, Sexo, DisfuncionGonadal, Dieta) %>% column_to_rownames("Sample"))
##                              Df SumOfSqs      R2       F Pr(>F)    
## Sexo                          1   0.1766 0.02566  2.1919  0.068 .  
## DisfuncionGonadal             1   0.2006 0.02913  2.4889  0.047 *  
## Dieta                         1   1.1689 0.16980 14.5064  0.001 ***
## Sexo:DisfuncionGonadal        1   0.0781 0.01134  0.9692  0.418    
## Sexo:Dieta                    1   0.2098 0.03047  2.6034  0.031 *  
## DisfuncionGonadal:Dieta       1   0.0919 0.01335  1.1403  0.328    
## Sexo:DisfuncionGonadal:Dieta  1   0.1237 0.01796  1.5345  0.167    
## Residual                     60   4.8348 0.70229                   
## Total                        67   6.8843 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Análisis diferencial

• Qué taxas son las responsables de las diferencias

gamlss_table <- otu_table %>% 
  t() %>% 
  as_tibble(rownames = "Sample") %>% 
  pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>% 
  inner_join(metadata2) %>% 
  group_by(Sample) %>%
  mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
  ungroup() %>%
  group_by(NCBI) %>%
  mutate(Nzeros = sum(Reads ==0)) %>%
  ungroup() %>%
  mutate(NSamples = n_distinct(Sample)) %>%
  mutate(Freq = (Reads+1)/TotalReads) %>%
  mutate(ZeroFreq = Nzeros/NSamples) %>% 
  filter(ZeroFreq < 0.9) %>% 
  dplyr::select(NCBI,Reads,Freq,ZeroFreq,Sexo,DisfuncionGonadal,Dieta,Sample) %>% 
  mutate(Reads = Reads+1) %>% 
  nest_by(NCBI) %>% 
  mutate(model = list(gamlss(formula = Freq ~ Sexo*Dieta + DisfuncionGonadal, 
                             data = data, 
                             family = "BE", 
                             control = gamlss.control(n.cyc = 200, trace = F)))) %>% 
  mutate(test = list(emmeans(model, specs = c("Sexo","DisfuncionGonadal","Dieta"), data = data) %>% 
                       pairs() %>% 
                       as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  write_tsv("Cuestion5.2.tsv")
## Joining, by = "NCBI"
gamlss_table %>% 
  dplyr::select(NCBI,test) %>% 
  unnest(test) %>% 
  mutate(padj = p.adjust(p.value,method = "BH")) %>% 
  inner_join(table %>% 
               dplyr::select(NCBI,Name2) %>% 
               mutate(NCBI = as.character(NCBI)) %>% 
               distinct()) %>% 
  dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>% 
  #separate(contrast, c("A","B"), sep = " - ") %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",Name,NA)) %>% 
  ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 28234 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 49 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 54 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 42 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 67 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 31 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 42 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 67 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps